package de.tuberlin.dima.aim3.oc.input;

import java.util.List;

import de.tuberlin.dima.aim3.oc.Constants;
import de.tuberlin.dima.aim3.oc.input.custom.WikiPage;

/**
 * Supports parsing of Wikipedia XML dumps.
 * 
 * @author Florian Feigenbutz <florian.feigenbutz@campus.tu-berlin.de>
 * 
 */
public abstract class WikiDumpParser {

  public WikiPage readWikiDumpPage(String pageAsString) {
    byte[] pageAsBytes = pageAsString.getBytes(Constants.CHARSET);
    // InputStream in = new StringBufferInputStream(pageAsString);
    return readWikiDumpPage(pageAsBytes);

  }

  public abstract WikiPage readWikiDumpPage(byte[] pageAsBytes);

  public abstract List<WikiPage> readWikiDumpPage() throws Exception;

}